1
2
3
4
5
6
7
8
9
10
11 """Restriction Enzyme classes.
12
13 Notes about the diverses class of the restriction enzyme implementation::
14
15 RestrictionType is the type of all restriction enzymes.
16 ----------------------------------------------------------------------------
17 AbstractCut implements some methods that are common to all enzymes.
18 ----------------------------------------------------------------------------
19 NoCut, OneCut,TwoCuts represent the number of double strand cuts
20 produced by the enzyme.
21 they correspond to the 4th field of the
22 rebase record emboss_e.NNN.
23 0->NoCut : the enzyme is not characterised.
24 2->OneCut : the enzyme produce one double strand cut.
25 4->TwoCuts : two double strand cuts.
26 ----------------------------------------------------------------------------
27 Meth_Dep, Meth_Undep represent the methylation susceptibility to
28 the enzyme.
29 Not implemented yet.
30 ----------------------------------------------------------------------------
31 Palindromic, if the site is palindromic or not.
32 NotPalindromic allow some optimisations of the code.
33 No need to check the reverse strand
34 with palindromic sites.
35 ----------------------------------------------------------------------------
36 Unknown, Blunt, represent the overhang.
37 Ov5, Ov3 Unknown is here for symmetry reasons and
38 correspond to enzymes that are not
39 characterised in rebase.
40 ----------------------------------------------------------------------------
41 Defined, Ambiguous, represent the sequence of the overhang.
42 NotDefined
43 NotDefined is for enzymes not characterised
44 in rebase.
45
46 Defined correspond to enzymes that display
47 a constant overhang whatever the sequence.
48 ex : EcoRI. G^AATTC -> overhang :AATT
49 CTTAA^G
50
51 Ambiguous : the overhang varies with the
52 sequence restricted.
53 Typically enzymes which cut outside their
54 restriction site or (but not always)
55 inside an ambiguous site.
56 ex:
57 AcuI CTGAAG(22/20) -> overhang : NN
58 AasI GACNNN^NNNGTC -> overhang : NN
59 CTGN^NNNNNCAG
60
61 note : these 3 classes refers to the overhang not the site.
62 So the enzyme ApoI (RAATTY) is defined even if its
63 restriction site is ambiguous.
64
65 ApoI R^AATTY -> overhang : AATT -> Defined
66 YTTAA^R
67 Accordingly, blunt enzymes are always Defined even
68 when they cut outside their restriction site.
69 ----------------------------------------------------------------------------
70 Not_available, as found in rebase file emboss_r.NNN files.
71 Commercially_available
72 allow the selection of the enzymes
73 according to their suppliers to reduce the
74 quantity of results.
75 Also will allow the implementation of
76 buffer compatibility tables. Not
77 implemented yet.
78
79 the list of suppliers is extracted from
80 emboss_s.NNN
81 ----------------------------------------------------------------------------
82 """
83
84 from __future__ import print_function
85 from Bio._py3k import zip
86 from Bio._py3k import filter
87 from Bio._py3k import range
88
89 import re
90 import itertools
91
92 from Bio.Seq import Seq, MutableSeq
93 from Bio.Alphabet import IUPAC
94
95 from Bio.Restriction.Restriction_Dictionary import rest_dict as enzymedict
96 from Bio.Restriction.Restriction_Dictionary import typedict
97 from Bio.Restriction.Restriction_Dictionary import suppliers as suppliers_dict
98
99 from Bio.Restriction.RanaConfig import *
100 from Bio.Restriction.PrintFormat import PrintFormat
108 """Check characters in a string (PRIVATE).
109
110 Remove digits and white space present in string. Allows any valid ambiguous
111 IUPAC DNA single letters codes (ABCDGHKMNRSTVWY, lower case are converted).
112
113 Other characters (e.g. symbols) trigger a TypeError.
114
115 Returns the string WITH A LEADING SPACE (!). This is for backwards
116 compatibility, and may in part be explained by the fact that
117 Bio.Restriction doesn't use zero based counting.
118 """
119
120 seq_string = "".join(seq_string.split()).upper()
121
122 for c in "0123456789":
123 seq_string = seq_string.replace(c, "")
124
125 if not set(seq_string).issubset(set("ABCDGHKMNRSTVWY")):
126 raise TypeError("Invalid character found in %s" % repr(seq_string))
127 return " " + seq_string
128
129
130 matching = {'A': 'ARWMHVDN', 'C': 'CYSMHBVN', 'G': 'GRSKBVDN',
131 'T': 'TYWKHBDN', 'R': 'ABDGHKMNSRWV', 'Y': 'CBDHKMNSTWVY',
132 'W': 'ABDHKMNRTWVY', 'S': 'CBDGHKMNSRVY', 'M': 'ACBDHMNSRWVY',
133 'K': 'BDGHKNSRTWVY', 'H': 'ACBDHKMNSRTWVY',
134 'B': 'CBDGHKMNSRTWVY', 'V': 'ACBDGHKMNSRWVY',
135 'D': 'ABDGHKMNSRTWVY', 'N': 'ACBDGHKMNSRTWVY'}
136
137 DNA = Seq
242
245 """RestrictionType. Type from which derives all enzyme classes.
246
247 Implement the operator methods.
248 """
249
250 - def __init__(cls, name='', bases=(), dct=None):
251 """RE(name, bases, dct) -> RestrictionType instance.
252
253 Not intended to be used in normal operation. The enzymes are
254 instantiated when importing the module.
255
256 see below."""
257 if "-" in name:
258 raise ValueError("Problem with hyphen in %s as enzyme name"
259 % repr(name))
260
261
262
263 try:
264 cls.compsite = re.compile(cls.compsite)
265 except Exception as err:
266 raise ValueError("Problem with regular expression, re.compiled(%s)"
267 % repr(cls.compsite))
268
281
283 """RE.__div__(other) -> list.
284
285 RE/other
286 returns RE.search(other)."""
287 return cls.search(other)
288
290 """RE.__rdiv__(other) -> list.
291
292 other/RE
293 returns RE.search(other)."""
294 return cls.search(other)
295
297 """RE.__truediv__(other) -> list.
298
299 RE/other
300 returns RE.search(other)."""
301 return cls.search(other)
302
304 """RE.__rtruediv__(other) -> list.
305
306 other/RE
307 returns RE.search(other)."""
308 return cls.search(other)
309
311 """RE.__floordiv__(other) -> list.
312
313 RE//other
314 returns RE.catalyse(other)."""
315 return cls.catalyse(other)
316
318 """RE.__rfloordiv__(other) -> list.
319
320 other//RE
321 returns RE.catalyse(other)."""
322 return cls.catalyse(other)
323
325 """RE.__str__() -> str.
326
327 return the name of the enzyme."""
328 return cls.__name__
329
331 """RE.__repr__() -> str.
332
333 used with eval or exec will instantiate the enzyme."""
334 return "%s" % cls.__name__
335
337 """RE.__len__() -> int.
338
339 length of the recognition site."""
340 return cls.size
341
343
344
345 return id(cls)
346
348 """RE == other -> bool
349
350 True if RE and other are the same enzyme.
351
352 Specifically this checks they are the same Python object.
353 """
354
355 return id(cls) == id(other)
356
358 """RE != other -> bool.
359 isoschizomer strict, same recognition site, same restriction -> False
360 all the other-> True
361
362 WARNING - This is not the inverse of the __eq__ method.
363 """
364 if not isinstance(other, RestrictionType):
365 return True
366 elif cls.charac == other.charac:
367 return False
368 else:
369 return True
370
372 """RE >> other -> bool.
373
374 neoschizomer : same recognition site, different restriction. -> True
375 all the others : -> False
376 """
377 if not isinstance(other, RestrictionType):
378 return False
379 elif cls.site == other.site and cls.charac != other.charac:
380 return True
381 else:
382 return False
383
385 """a % b -> bool.
386
387 Test compatibility of the overhang of a and b.
388 True if a and b have compatible overhang.
389 """
390 if not isinstance(other, RestrictionType):
391 raise TypeError(
392 'expected RestrictionType, got %s instead' % type(other))
393 return cls._mod1(other)
394
396 """a >= b -> bool.
397
398 a is greater or equal than b if the a site is longer than b site.
399 if their site have the same length sort by alphabetical order of their
400 names."""
401 if not isinstance(other, RestrictionType):
402 raise NotImplementedError
403 if len(cls) > len(other):
404 return True
405 elif cls.size == len(other) and cls.__name__ >= other.__name__:
406 return True
407 else:
408 return False
409
411 """a > b -> bool.
412
413 sorting order:
414 1. size of the recognition site.
415 2. if equal size, alphabetical order of the names."""
416 if not isinstance(other, RestrictionType):
417 raise NotImplementedError
418 if len(cls) > len(other):
419 return True
420 elif cls.size == len(other) and cls.__name__ > other.__name__:
421 return True
422 else:
423 return False
424
426 """a <= b -> bool.
427
428 sorting order:
429 1. size of the recognition site.
430 2. if equal size, alphabetical order of the names.
431 """
432 if not isinstance(other, RestrictionType):
433 raise NotImplementedError
434 elif len(cls) < len(other):
435 return True
436 elif len(cls) == len(other) and cls.__name__ <= other.__name__:
437 return True
438 else:
439 return False
440
442 """a < b -> bool.
443
444 sorting order:
445 1. size of the recognition site.
446 2. if equal size, alphabetical order of the names.
447 """
448 if not isinstance(other, RestrictionType):
449 raise NotImplementedError
450 elif len(cls) < len(other):
451 return True
452 elif len(cls) == len(other) and cls.__name__ < other.__name__:
453 return True
454 else:
455 return False
456
459 """Implement the methods that are common to all restriction enzymes.
460
461 All the methods are classmethod.
462
463 For internal use only. Not meant to be instantiate.
464 """
465
466 @classmethod
467 - def search(cls, dna, linear=True):
468 """RE.search(dna, linear=True) -> list.
469
470 return a list of all the site of RE in dna. Compensate for circular
471 sequences and so on.
472
473 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
474
475 if linear is False, the restriction sites than span over the boundaries
476 will be included.
477
478 The positions are the first base of the 3' fragment,
479 i.e. the first base after the position the enzyme will cut.
480 """
481
482
483
484
485
486
487
488 if isinstance(dna, FormattedSeq):
489 cls.dna = dna
490 return cls._search()
491 else:
492 cls.dna = FormattedSeq(dna, linear)
493 return cls._search()
494
495 @classmethod
497 """RE.all_suppliers -> print all the suppliers of R"""
498 supply = sorted(x[0] for x in suppliers_dict.values())
499 print(",\n".join(supply))
500 return
501
502 @classmethod
504 """RE.is_equischizomers(other) -> bool.
505
506 True if other is an isoschizomer of RE.
507 False else.
508
509 equischizomer <=> same site, same position of restriction.
510 """
511 return not cls != other
512
513 @classmethod
515 """RE.is_neoschizomers(other) -> bool.
516
517 True if other is an isoschizomer of RE.
518 False else.
519
520 neoschizomer <=> same site, different position of restriction.
521 """
522 return cls >> other
523
524 @classmethod
526 """RE.is_isoschizomers(other) -> bool.
527
528 True if other is an isoschizomer of RE.
529 False else.
530
531 isoschizomer <=> same site."""
532 return (not cls != other) or cls >> other
533
534 @classmethod
536 """RE.equischizomers([batch]) -> list.
537
538 return a tuple of all the isoschizomers of RE.
539 if batch is supplied it is used instead of the default AllEnzymes.
540
541 equischizomer <=> same site, same position of restriction.
542 """
543 if not batch:
544 batch = AllEnzymes
545 r = [x for x in batch if not cls != x]
546 i = r.index(cls)
547 del r[i]
548 r.sort()
549 return r
550
551 @classmethod
553 """RE.neoschizomers([batch]) -> list.
554
555 return a tuple of all the neoschizomers of RE.
556 if batch is supplied it is used instead of the default AllEnzymes.
557
558 neoschizomer <=> same site, different position of restriction."""
559 if not batch:
560 batch = AllEnzymes
561 r = sorted(x for x in batch if cls >> x)
562 return r
563
564 @classmethod
566 """RE.isoschizomers([batch]) -> list.
567
568 return a tuple of all the equischizomers and neoschizomers of RE.
569 if batch is supplied it is used instead of the default AllEnzymes.
570 """
571 if not batch:
572 batch = AllEnzymes
573 r = [x for x in batch if (cls >> x) or (not cls != x)]
574 i = r.index(cls)
575 del r[i]
576 r.sort()
577 return r
578
579 @classmethod
581 """RE.frequency() -> int.
582
583 frequency of the site."""
584 return cls.freq
585
586
587 -class NoCut(AbstractCut):
588 """Implement the methods specific to the enzymes that do not cut.
589
590 These enzymes are generally enzymes that have been only partially
591 characterised and the way they cut the DNA is unknow or enzymes for
592 which the pattern of cut is to complex to be recorded in Rebase
593 (ncuts values of 0 in emboss_e.###).
594
595 When using search() with these enzymes the values returned are at the start
596 of the restriction site.
597
598 Their catalyse() method returns a TypeError.
599
600 Unknown and NotDefined are also part of the base classes of these enzymes.
601
602 Internal use only. Not meant to be instantiated.
603 """
604
605 @classmethod
607 """RE.cut_once() -> bool.
608
609 True if the enzyme cut the sequence one time on each strand."""
610 return False
611
612 @classmethod
614 """RE.cut_twice() -> bool.
615
616 True if the enzyme cut the sequence twice on each strand."""
617 return False
618
619 @classmethod
621 """RE._modify(location) -> int.
622
623 for internal use only.
624
625 location is an integer corresponding to the location of the match for
626 the enzyme pattern in the sequence.
627 _modify returns the real place where the enzyme will cut.
628
629 example::
630
631 EcoRI pattern : GAATTC
632 EcoRI will cut after the G.
633 so in the sequence:
634 ______
635 GAATACACGGAATTCGA
636 |
637 10
638 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base
639 EcoRI cut after the G so:
640 EcoRI._modify(10) -> 11.
641
642 if the enzyme cut twice _modify will returns two integer corresponding
643 to each cutting site.
644 """
645 yield location
646
647 @classmethod
649 """RE._rev_modify(location) -> generator of int.
650
651 for internal use only.
652
653 as _modify for site situated on the antiparallel strand when the
654 enzyme is not palindromic
655 """
656 yield location
657
658 @classmethod
660 """RE.characteristic() -> tuple.
661
662 the tuple contains the attributes:
663 fst5 -> first 5' cut ((current strand) or None
664 fst3 -> first 3' cut (complementary strand) or None
665 scd5 -> second 5' cut (current strand) or None
666 scd5 -> second 3' cut (complementary strand) or None
667 site -> recognition site.
668 """
669 return None, None, None, None, cls.site
670
671
672 -class OneCut(AbstractCut):
673 """Implement the methods specific to the enzymes that cut the DNA only once
674
675 Correspond to ncuts values of 2 in emboss_e.###
676
677 Internal use only. Not meant to be instantiated.
678 """
679
680 @classmethod
682 """RE.cut_once() -> bool.
683
684 True if the enzyme cut the sequence one time on each strand.
685 """
686 return True
687
688 @classmethod
690 """RE.cut_twice() -> bool.
691
692 True if the enzyme cut the sequence twice on each strand.
693 """
694 return False
695
696 @classmethod
698 """RE._modify(location) -> int.
699
700 for internal use only.
701
702 location is an integer corresponding to the location of the match for
703 the enzyme pattern in the sequence.
704 _modify returns the real place where the enzyme will cut.
705
706 example::
707
708 EcoRI pattern : GAATTC
709 EcoRI will cut after the G.
710 so in the sequence:
711 ______
712 GAATACACGGAATTCGA
713 |
714 10
715 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base
716 EcoRI cut after the G so:
717 EcoRI._modify(10) -> 11.
718
719 if the enzyme cut twice _modify will returns two integer corresponding
720 to each cutting site.
721 """
722 yield location + cls.fst5
723
724 @classmethod
726 """RE._rev_modify(location) -> generator of int.
727
728 for internal use only.
729
730 as _modify for site situated on the antiparallel strand when the
731 enzyme is not palindromic
732 """
733 yield location - cls.fst3
734
735 @classmethod
737 """RE.characteristic() -> tuple.
738
739 the tuple contains the attributes:
740 fst5 -> first 5' cut ((current strand) or None
741 fst3 -> first 3' cut (complementary strand) or None
742 scd5 -> second 5' cut (current strand) or None
743 scd5 -> second 3' cut (complementary strand) or None
744 site -> recognition site.
745 """
746 return cls.fst5, cls.fst3, None, None, cls.site
747
750 """Implement the methods specific to the enzymes that cut the DNA twice
751
752 Correspond to ncuts values of 4 in emboss_e.###
753
754 Internal use only. Not meant to be instantiated."""
755
756 @classmethod
758 """RE.cut_once() -> bool.
759
760 True if the enzyme cut the sequence one time on each strand."""
761 return False
762
763 @classmethod
765 """RE.cut_twice() -> bool.
766
767 True if the enzyme cut the sequence twice on each strand.
768 """
769 return True
770
771 @classmethod
773 """RE._modify(location) -> int.
774
775 for internal use only.
776
777 location is an integer corresponding to the location of the match for
778 the enzyme pattern in the sequence.
779 _modify returns the real place where the enzyme will cut.
780
781 example::
782
783 EcoRI pattern : GAATTC
784 EcoRI will cut after the G.
785 so in the sequence:
786 ______
787 GAATACACGGAATTCGA
788 |
789 10
790 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base
791 EcoRI cut after the G so:
792 EcoRI._modify(10) -> 11.
793
794 if the enzyme cut twice _modify will returns two integer corresponding
795 to each cutting site.
796 """
797 yield location + cls.fst5
798 yield location + cls.scd5
799
800 @classmethod
802 """RE._rev_modify(location) -> generator of int.
803
804 for internal use only.
805
806 as _modify for site situated on the antiparallel strand when the
807 enzyme is not palindromic
808 """
809 yield location - cls.fst3
810 yield location - cls.scd3
811
812 @classmethod
814 """RE.characteristic() -> tuple.
815
816 the tuple contains the attributes:
817 fst5 -> first 5' cut ((current strand) or None
818 fst3 -> first 3' cut (complementary strand) or None
819 scd5 -> second 5' cut (current strand) or None
820 scd5 -> second 3' cut (complementary strand) or None
821 site -> recognition site.
822 """
823 return cls.fst5, cls.fst3, cls.scd5, cls.scd3, cls.site
824
827 """Implement the information about methylation.
828
829 Enzymes of this class possess a site which is methylable.
830 """
831
832 @classmethod
834 """RE.is_methylable() -> bool.
835
836 True if the recognition site is a methylable.
837 """
838 return True
839
842 """Implement information about methylation sensitibility.
843
844 Enzymes of this class are not sensible to methylation.
845 """
846
847 @classmethod
849 """RE.is_methylable() -> bool.
850
851 True if the recognition site is a methylable.
852 """
853 return False
854
857 """Implement the methods specific to the enzymes which are palindromic
858
859 palindromic means : the recognition site and its reverse complement are
860 identical.
861 Remarks : an enzyme with a site CGNNCG is palindromic even if some
862 of the sites that it will recognise are not.
863 for example here : CGAACG
864
865 Internal use only. Not meant to be instantiated."""
866
867 @classmethod
869 """RE._search() -> list.
870
871 for internal use only.
872
873 implement the search method for palindromic and non palindromic enzyme.
874 """
875 siteloc = cls.dna.finditer(cls.compsite, cls.size)
876 cls.results = [r for s, g in siteloc for r in cls._modify(s)]
877 if cls.results:
878 cls._drop()
879 return cls.results
880
881 @classmethod
883 """RE.is_palindromic() -> bool.
884
885 True if the recognition site is a palindrom.
886 """
887 return True
888
891 """Implement the methods specific to the enzymes which are not palindromic
892
893 palindromic means : the recognition site and its reverse complement are
894 identical.
895
896 Internal use only. Not meant to be instantiated."""
897
898 @classmethod
900 """RE._search() -> list.
901
902 for internal use only.
903
904 implement the search method for palindromic and non palindromic enzyme.
905 """
906 iterator = cls.dna.finditer(cls.compsite, cls.size)
907 cls.results = []
908 modif = cls._modify
909 revmodif = cls._rev_modify
910 s = str(cls)
911 cls.on_minus = []
912 for start, group in iterator:
913 if group(s):
914 cls.results += [r for r in modif(start)]
915 else:
916 cls.on_minus += [r for r in revmodif(start)]
917 cls.results += cls.on_minus
918 if cls.results:
919 cls.results.sort()
920 cls._drop()
921 return cls.results
922
923 @classmethod
925 """RE.is_palindromic() -> bool.
926
927 True if the recognition site is a palindrom.
928 """
929 return False
930
933 """Implement the methods specific to the enzymes for which the overhang
934 is unknown.
935
936 These enzymes are also NotDefined and NoCut.
937
938 Internal use only. Not meant to be instantiated.
939 """
940
941 @classmethod
943 """RE.catalyse(dna, linear=True) -> tuple of DNA.
944 RE.catalyze(dna, linear=True) -> tuple of DNA.
945
946 return a tuple of dna as will be produced by using RE to restrict the
947 dna.
948
949 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
950
951 if linear is False, the sequence is considered to be circular and the
952 output will be modified accordingly.
953 """
954 raise NotImplementedError('%s restriction is unknown.'
955 % cls.__name__)
956 catalyze = catalyse
957
958 @classmethod
960 """RE.is_blunt() -> bool.
961
962 True if the enzyme produces blunt end.
963
964 see also:
965 RE.is_3overhang()
966 RE.is_5overhang()
967 RE.is_unknown()
968 """
969 return False
970
971 @classmethod
973 """RE.is_5overhang() -> bool.
974
975 True if the enzyme produces 5' overhang sticky end.
976
977 see also:
978 RE.is_3overhang()
979 RE.is_blunt()
980 RE.is_unknown()
981 """
982 return False
983
984 @classmethod
986 """RE.is_3overhang() -> bool.
987
988 True if the enzyme produces 3' overhang sticky end.
989
990 see also:
991 RE.is_5overhang()
992 RE.is_blunt()
993 RE.is_unknown()
994 """
995 return False
996
997 @classmethod
999 """RE.overhang() -> str. type of overhang of the enzyme.,
1000
1001 can be "3' overhang", "5' overhang", "blunt", "unknown"
1002 """
1003 return 'unknown'
1004
1005 @classmethod
1007 """RE.compatible_end() -> list.
1008
1009 list of all the enzymes that share compatible end with RE.
1010 """
1011 return []
1012
1013 @classmethod
1015 """RE._mod1(other) -> bool.
1016
1017 for internal use only
1018
1019 test for the compatibility of restriction ending of RE and other.
1020 """
1021 return False
1022
1023
1024 -class Blunt(AbstractCut):
1025 """Implement the methods specific to the enzymes for which the overhang
1026 is blunt.
1027
1028 The enzyme cuts the + strand and the - strand of the DNA at the same
1029 place.
1030
1031 Internal use only. Not meant to be instantiated.
1032 """
1033
1034 @classmethod
1036 """RE.catalyse(dna, linear=True) -> tuple of DNA.
1037 RE.catalyze(dna, linear=True) -> tuple of DNA.
1038
1039 return a tuple of dna as will be produced by using RE to restrict the
1040 dna.
1041
1042 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
1043
1044 if linear is False, the sequence is considered to be circular and the
1045 output will be modified accordingly.
1046 """
1047 r = cls.search(dna, linear)
1048 d = cls.dna
1049 if not r:
1050 return d[1:],
1051 fragments = []
1052 length = len(r) - 1
1053 if d.is_linear():
1054
1055
1056
1057 fragments.append(d[1:r[0]])
1058 if length:
1059
1060
1061
1062 fragments += [d[r[x]:r[x + 1]] for x in range(length)]
1063
1064
1065
1066 fragments.append(d[r[-1]:])
1067 else:
1068
1069
1070
1071 fragments.append(d[r[-1]:] + d[1:r[0]])
1072 if not length:
1073
1074
1075
1076 return tuple(fragments)
1077
1078
1079
1080 fragments += [d[r[x]:r[x + 1]] for x in range(length)]
1081 return tuple(fragments)
1082 catalyze = catalyse
1083
1084 @classmethod
1086 """RE.is_blunt() -> bool.
1087
1088 True if the enzyme produces blunt end.
1089
1090 see also:
1091 RE.is_3overhang()
1092 RE.is_5overhang()
1093 RE.is_unknown()
1094 """
1095 return True
1096
1097 @classmethod
1099 """RE.is_5overhang() -> bool.
1100
1101 True if the enzyme produces 5' overhang sticky end.
1102
1103 see also:
1104 RE.is_3overhang()
1105 RE.is_blunt()
1106 RE.is_unknown()
1107 """
1108 return False
1109
1110 @classmethod
1112 """RE.is_3overhang() -> bool.
1113
1114 True if the enzyme produces 3' overhang sticky end.
1115
1116 see also:
1117 RE.is_5overhang()
1118 RE.is_blunt()
1119 RE.is_unknown()
1120 """
1121 return False
1122
1123 @classmethod
1125 """RE.overhang() -> str. type of overhang of the enzyme.,
1126
1127 can be "3' overhang", "5' overhang", "blunt", "unknown"
1128 """
1129 return 'blunt'
1130
1131 @classmethod
1133 """RE.compatible_end() -> list.
1134
1135 list of all the enzymes that share compatible end with RE.
1136 """
1137 if not batch:
1138 batch = AllEnzymes
1139 r = sorted(x for x in iter(AllEnzymes) if x.is_blunt())
1140 return r
1141
1142 @staticmethod
1144 """RE._mod1(other) -> bool.
1145
1146 for internal use only
1147
1148 test for the compatibility of restriction ending of RE and other.
1149 """
1150 return issubclass(other, Blunt)
1151
1152
1153 -class Ov5(AbstractCut):
1154 """Implement the methods specific to the enzymes for which the overhang
1155 is recessed in 3'.
1156
1157 The enzyme cuts the + strand after the - strand of the DNA.
1158
1159 Internal use only. Not meant to be instantiated.
1160 """
1161
1162 @classmethod
1164 """RE.catalyse(dna, linear=True) -> tuple of DNA.
1165 RE.catalyze(dna, linear=True) -> tuple of DNA.
1166
1167 return a tuple of dna as will be produced by using RE to restrict the
1168 dna.
1169
1170 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
1171
1172 if linear is False, the sequence is considered to be circular and the
1173 output will be modified accordingly.
1174 """
1175 r = cls.search(dna, linear)
1176 d = cls.dna
1177 if not r:
1178 return d[1:],
1179 length = len(r) - 1
1180 fragments = []
1181 if d.is_linear():
1182
1183
1184
1185 fragments.append(d[1:r[0]])
1186 if length:
1187
1188
1189
1190 fragments += [d[r[x]:r[x + 1]] for x in range(length)]
1191
1192
1193
1194 fragments.append(d[r[-1]:])
1195 else:
1196
1197
1198
1199 fragments.append(d[r[-1]:] + d[1:r[0]])
1200 if not length:
1201
1202
1203
1204 return tuple(fragments)
1205
1206
1207
1208 fragments += [d[r[x]:r[x + 1]] for x in range(length)]
1209 return tuple(fragments)
1210 catalyze = catalyse
1211
1212 @classmethod
1214 """RE.is_blunt() -> bool.
1215
1216 True if the enzyme produces blunt end.
1217
1218 see also:
1219 RE.is_3overhang()
1220 RE.is_5overhang()
1221 RE.is_unknown()
1222 """
1223 return False
1224
1225 @classmethod
1227 """RE.is_5overhang() -> bool.
1228
1229 True if the enzyme produces 5' overhang sticky end.
1230
1231 see also:
1232 RE.is_3overhang()
1233 RE.is_blunt()
1234 RE.is_unknown()
1235 """
1236 return True
1237
1238 @classmethod
1240 """RE.is_3overhang() -> bool.
1241
1242 True if the enzyme produces 3' overhang sticky end.
1243
1244 see also:
1245 RE.is_5overhang()
1246 RE.is_blunt()
1247 RE.is_unknown()
1248 """
1249 return False
1250
1251 @classmethod
1253 """RE.overhang() -> str. type of overhang of the enzyme.,
1254
1255 can be "3' overhang", "5' overhang", "blunt", "unknown"
1256 """
1257 return "5' overhang"
1258
1259 @classmethod
1261 """RE.compatible_end() -> list.
1262
1263 list of all the enzymes that share compatible end with RE."""
1264 if not batch:
1265 batch = AllEnzymes
1266 r = sorted(x for x in iter(AllEnzymes) if x.is_5overhang() and
1267 x % cls)
1268 return r
1269
1270 @classmethod
1272 """RE._mod1(other) -> bool.
1273
1274 for internal use only
1275
1276 test for the compatibility of restriction ending of RE and other.
1277 """
1278 if issubclass(other, Ov5):
1279 return cls._mod2(other)
1280 else:
1281 return False
1282
1283
1284 -class Ov3(AbstractCut):
1285 """Implement the methods specific to the enzymes for which the overhang
1286 is recessed in 5'.
1287
1288 The enzyme cuts the - strand after the + strand of the DNA.
1289
1290 Internal use only. Not meant to be instantiated.
1291 """
1292
1293 @classmethod
1295 """RE.catalyse(dna, linear=True) -> tuple of DNA.
1296 RE.catalyze(dna, linear=True) -> tuple of DNA.
1297
1298 return a tuple of dna as will be produced by using RE to restrict the
1299 dna.
1300
1301 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
1302
1303 if linear is False, the sequence is considered to be circular and the
1304 output will be modified accordingly.
1305 """
1306 r = cls.search(dna, linear)
1307 d = cls.dna
1308 if not r:
1309 return d[1:],
1310 fragments = []
1311 length = len(r) - 1
1312 if d.is_linear():
1313
1314
1315
1316 fragments.append(d[1:r[0]])
1317 if length:
1318
1319
1320
1321 fragments += [d[r[x]:r[x + 1]] for x in range(length)]
1322
1323
1324
1325 fragments.append(d[r[-1]:])
1326 else:
1327
1328
1329
1330 fragments.append(d[r[-1]:] + d[1:r[0]])
1331 if not length:
1332
1333
1334
1335 return tuple(fragments)
1336
1337
1338
1339 fragments += [d[r[x]:r[x + 1]] for x in range(length)]
1340 return tuple(fragments)
1341 catalyze = catalyse
1342
1343 @classmethod
1345 """RE.is_blunt() -> bool.
1346
1347 True if the enzyme produces blunt end.
1348
1349 see also:
1350 RE.is_3overhang()
1351 RE.is_5overhang()
1352 RE.is_unknown()
1353 """
1354 return False
1355
1356 @classmethod
1358 """RE.is_5overhang() -> bool.
1359
1360 True if the enzyme produces 5' overhang sticky end.
1361
1362 see also:
1363 RE.is_3overhang()
1364 RE.is_blunt()
1365 RE.is_unknown()
1366 """
1367 return False
1368
1369 @classmethod
1371 """RE.is_3overhang() -> bool.
1372
1373 True if the enzyme produces 3' overhang sticky end.
1374
1375 see also:
1376 RE.is_5overhang()
1377 RE.is_blunt()
1378 RE.is_unknown()
1379 """
1380 return True
1381
1382 @classmethod
1384 """RE.overhang() -> str. type of overhang of the enzyme.,
1385
1386 can be "3' overhang", "5' overhang", "blunt", "unknown"
1387 """
1388 return "3' overhang"
1389
1390 @classmethod
1392 """RE.compatible_end() -> list.
1393
1394 list of all the enzymes that share compatible end with RE.
1395 """
1396 if not batch:
1397 batch = AllEnzymes
1398 r = sorted(x for x in iter(AllEnzymes) if x.is_3overhang() and
1399 x % cls)
1400 return r
1401
1402 @classmethod
1404 """RE._mod1(other) -> bool.
1405
1406 for internal use only
1407
1408 test for the compatibility of restriction ending of RE and other.
1409 """
1410
1411
1412
1413 if issubclass(other, Ov3):
1414 return cls._mod2(other)
1415 else:
1416 return False
1417
1420 """Implement the methods specific to the enzymes for which the overhang
1421 and the cut are not variable.
1422
1423 Typical example : EcoRI -> G^AATT_C
1424 The overhang will always be AATT
1425 Notes:
1426 Blunt enzymes are always defined. even if there site is GGATCCNNN^_N
1427 There overhang is always the same : blunt!
1428
1429 Internal use only. Not meant to be instantiated."""
1430
1431 @classmethod
1466
1467 @classmethod
1469 """RE.is_defined() -> bool.
1470
1471 True if the sequence recognised and cut is constant,
1472 i.e. the recognition site is not degenerated AND the enzyme cut inside
1473 the site.
1474
1475 see also:
1476 RE.is_ambiguous()
1477 RE.is_unknown()
1478 """
1479 return True
1480
1481 @classmethod
1483 """RE.is_ambiguous() -> bool.
1484
1485 True if the sequence recognised and cut is ambiguous,
1486 i.e. the recognition site is degenerated AND/OR the enzyme cut outside
1487 the site.
1488
1489 see also:
1490 RE.is_defined()
1491 RE.is_unknown()
1492 """
1493 return False
1494
1495 @classmethod
1497 """RE.is_unknown() -> bool.
1498
1499 True if the sequence is unknown,
1500 i.e. the recognition site has not been characterised yet.
1501
1502 see also:
1503 RE.is_defined()
1504 RE.is_ambiguous()
1505 """
1506 return False
1507
1508 @classmethod
1510 """RE.elucidate() -> str
1511
1512 return a representation of the site with the cut on the (+) strand
1513 represented as '^' and the cut on the (-) strand as '_'.
1514 ie:
1515 >>> EcoRI.elucidate() # 5' overhang
1516 'G^AATT_C'
1517 >>> KpnI.elucidate() # 3' overhang
1518 'G_GTAC^C'
1519 >>> EcoRV.elucidate() # blunt
1520 'GAT^_ATC'
1521 >>> SnaI.elucidate() # NotDefined, cut profile unknown.
1522 '? GTATAC ?'
1523 >>>
1524 """
1525 f5 = cls.fst5
1526 f3 = cls.fst3
1527 site = cls.site
1528 if cls.cut_twice():
1529 re = 'cut twice, not yet implemented sorry.'
1530 elif cls.is_5overhang():
1531 if f5 == f3 == 0:
1532 re = 'N^' + cls.site + '_N'
1533 elif f3 == 0:
1534 re = site[:f5] + '^' + site[f5:] + '_N'
1535 else:
1536 re = site[:f5] + '^' + site[f5:f3] + '_' + site[f3:]
1537 elif cls.is_blunt():
1538 re = site[:f5] + '^_' + site[f5:]
1539 else:
1540 if f5 == f3 == 0:
1541 re = 'N_' + site + '^N'
1542 else:
1543 re = site[:f3] + '_' + site[f3:f5] + '^' + site[f5:]
1544 return re
1545
1546 @classmethod
1548 """RE._mod2(other) -> bool.
1549
1550 for internal use only
1551
1552 test for the compatibility of restriction ending of RE and other.
1553 """
1554
1555
1556
1557 if other.ovhgseq == cls.ovhgseq:
1558 return True
1559 elif issubclass(other, Ambiguous):
1560 return other._mod2(cls)
1561 else:
1562 return False
1563
1566 """Implement the methods specific to the enzymes for which the overhang
1567 is variable.
1568
1569 Typical example : BstXI -> CCAN_NNNN^NTGG
1570 The overhang can be any sequence of 4 bases.
1571 Notes:
1572 Blunt enzymes are always defined. even if there site is GGATCCNNN^_N
1573 There overhang is always the same : blunt!
1574
1575 Internal use only. Not meant to be instantiated.
1576 """
1577
1578 @classmethod
1606
1607 @classmethod
1609 """RE.is_defined() -> bool.
1610
1611 True if the sequence recognised and cut is constant,
1612 i.e. the recognition site is not degenerated AND the enzyme cut inside
1613 the site.
1614
1615 see also:
1616 RE.is_ambiguous()
1617 RE.is_unknown()
1618 """
1619 return False
1620
1621 @classmethod
1623 """RE.is_ambiguous() -> bool.
1624
1625 True if the sequence recognised and cut is ambiguous,
1626 i.e. the recognition site is degenerated AND/OR the enzyme cut outside
1627 the site.
1628
1629 see also:
1630 RE.is_defined()
1631 RE.is_unknown()
1632 """
1633 return True
1634
1635 @classmethod
1637 """RE.is_unknown() -> bool.
1638
1639 True if the sequence is unknown,
1640 i.e. the recognition site has not been characterised yet.
1641
1642 see also:
1643 RE.is_defined()
1644 RE.is_ambiguous()
1645 """
1646 return False
1647
1648 @classmethod
1650 """RE._mod2(other) -> bool.
1651
1652 for internal use only
1653
1654 test for the compatibility of restriction ending of RE and other.
1655 """
1656
1657
1658
1659 if len(cls.ovhgseq) != len(other.ovhgseq):
1660 return False
1661 else:
1662 se = cls.ovhgseq
1663 for base in se:
1664 if base in 'ATCG':
1665 pass
1666 if base in 'N':
1667 se = '.'.join(se.split('N'))
1668 if base in 'RYWMSKHDBV':
1669 expand = '[' + matching[base] + ']'
1670 se = expand.join(se.split(base))
1671 if re.match(se, other.ovhgseq):
1672 return True
1673 else:
1674 return False
1675
1676 @classmethod
1678 """RE.elucidate() -> str
1679
1680 return a representation of the site with the cut on the (+) strand
1681 represented as '^' and the cut on the (-) strand as '_'.
1682 ie:
1683 >>> EcoRI.elucidate() # 5' overhang
1684 'G^AATT_C'
1685 >>> KpnI.elucidate() # 3' overhang
1686 'G_GTAC^C'
1687 >>> EcoRV.elucidate() # blunt
1688 'GAT^_ATC'
1689 >>> SnaI.elucidate() # NotDefined, cut profile unknown.
1690 '? GTATAC ?'
1691 >>>
1692 """
1693 f5 = cls.fst5
1694 f3 = cls.fst3
1695 length = len(cls)
1696 site = cls.site
1697 if cls.cut_twice():
1698 re = 'cut twice, not yet implemented sorry.'
1699 elif cls.is_5overhang():
1700 if f3 == f5 == 0:
1701 re = 'N^' + site + '_N'
1702 elif 0 <= f5 <= length and 0 <= f3 + length <= length:
1703 re = site[:f5] + '^' + site[f5:f3] + '_' + site[f3:]
1704 elif 0 <= f5 <= length:
1705 re = site[:f5] + '^' + site[f5:] + f3 * 'N' + '_N'
1706 elif 0 <= f3 + length <= length:
1707 re = 'N^' + abs(f5) * 'N' + site[:f3] + '_' + site[f3:]
1708 elif f3 + length < 0:
1709 re = 'N^' * abs(f5) * 'N' + '_' + abs(length + f3) * 'N' + site
1710 elif f5 > length:
1711 re = site + (f5 - length) * 'N' + '^' + (length +
1712 f3 - f5) * 'N' + '_N'
1713 else:
1714 re = 'N^' + abs(f5) * 'N' + site + f3 * 'N' + '_N'
1715 elif cls.is_blunt():
1716 if f5 < 0:
1717 re = 'N^_' + abs(f5) * 'N' + site
1718 elif f5 > length:
1719 re = site + (f5 - length) * 'N' + '^_N'
1720 else:
1721 raise ValueError('%s.easyrepr() : error f5=%i'
1722 % (cls.name, f5))
1723 else:
1724 if f3 == 0:
1725 if f5 == 0:
1726 re = 'N_' + site + '^N'
1727 else:
1728 re = site + '_' + (f5 - length) * 'N' + '^N'
1729 elif 0 < f3 + length <= length and 0 <= f5 <= length:
1730 re = site[:f3] + '_' + site[f3:f5] + '^' + site[f5:]
1731 elif 0 < f3 + length <= length:
1732 re = site[:f3] + '_' + site[f3:] + (f5 - length) * 'N' + '^N'
1733 elif 0 <= f5 <= length:
1734 re = 'N_' + 'N' * (f3 + length) + site[:f5] + '^' + site[f5:]
1735 elif f3 > 0:
1736 re = site + f3 * 'N' + '_' + (f5 - f3 - length) * 'N' + '^N'
1737 elif f5 < 0:
1738 re = 'N_' + abs(f3 - f5 + length) * 'N' + '^' + abs(f5) * 'N' \
1739 + site
1740 else:
1741 re = 'N_' + abs(f3 + length) * 'N' + site + (f5 - length) * \
1742 'N' + '^N'
1743 return re
1744
1747 """Implement the methods specific to the enzymes for which the overhang
1748 is not characterised.
1749
1750 Correspond to NoCut and Unknown.
1751
1752 Internal use only. Not meant to be instantiated.
1753 """
1754
1755 @classmethod
1779
1780 @classmethod
1782 """RE.is_defined() -> bool.
1783
1784 True if the sequence recognised and cut is constant,
1785 i.e. the recognition site is not degenerated AND the enzyme cut inside
1786 the site.
1787
1788 see also:
1789 RE.is_ambiguous()
1790 RE.is_unknown()
1791 """
1792 return False
1793
1794 @classmethod
1796 """RE.is_ambiguous() -> bool.
1797
1798 True if the sequence recognised and cut is ambiguous,
1799 i.e. the recognition site is degenerated AND/OR the enzyme cut outside
1800 the site.
1801
1802 see also:
1803 RE.is_defined()
1804 RE.is_unknown()
1805 """
1806 return False
1807
1808 @classmethod
1810 """RE.is_unknown() -> bool.
1811
1812 True if the sequence is unknown,
1813 i.e. the recognition site has not been characterised yet.
1814
1815 see also:
1816 RE.is_defined()
1817 RE.is_ambiguous()"""
1818 return True
1819
1820 @classmethod
1822 """RE._mod2(other) -> bool.
1823
1824 for internal use only
1825
1826 test for the compatibility of restriction ending of RE and other.
1827 """
1828
1829
1830
1831
1832
1833
1834
1835 raise ValueError("%s.mod2(%s), %s : NotDefined. pas glop pas glop!"
1836 % (str(cls), str(other), str(cls)))
1837
1838 @classmethod
1840 """RE.elucidate() -> str
1841
1842 return a representation of the site with the cut on the (+) strand
1843 represented as '^' and the cut on the (-) strand as '_'.
1844 ie:
1845 >>> EcoRI.elucidate() # 5' overhang
1846 'G^AATT_C'
1847 >>> KpnI.elucidate() # 3' overhang
1848 'G_GTAC^C'
1849 >>> EcoRV.elucidate() # blunt
1850 'GAT^_ATC'
1851 >>> SnaI.elucidate() # NotDefined, cut profile unknown.
1852 '? GTATAC ?'
1853 >>>
1854 """
1855 return '? %s ?' % cls.site
1856
1859
1860
1861
1862
1863 """Implement the methods specific to the enzymes which are commercially
1864 available.
1865
1866 Internal use only. Not meant to be instantiated.
1867 """
1868
1869 @classmethod
1871 """RE.suppliers() -> print the suppliers of RE."""
1872 for s in cls.suppl:
1873 print(suppliers_dict[s][0] + ',')
1874 return
1875
1876 @classmethod
1878 """RE.supplier_list() -> list.
1879
1880 list of the supplier names for RE.
1881 """
1882 return [v[0] for k, v in suppliers_dict.items() if k in cls.suppl]
1883
1884 @classmethod
1886 """RE.buffers(supplier) -> string.
1887
1888 not implemented yet.
1889 """
1890 return
1891
1892 @classmethod
1894 """RE.iscomm() -> bool.
1895
1896 True if RE has suppliers.
1897 """
1898 return True
1899
1902 """Implement the methods specific to the enzymes which are not commercially
1903 available.
1904
1905 Internal use only. Not meant to be instantiated.
1906 """
1907
1908 @staticmethod
1910 """RE.suppliers() -> print the suppliers of RE."""
1911 return None
1912
1913 @classmethod
1915 """RE.supplier_list() -> list.
1916
1917 list of the supplier names for RE.
1918 """
1919 return []
1920
1921 @classmethod
1923 """RE.buffers(supplier) -> string.
1924
1925 not implemented yet.
1926 """
1927 raise TypeError("Enzyme not commercially available.")
1928
1929 @classmethod
1931 """RE.iscomm() -> bool.
1932
1933 True if RE has suppliers.
1934 """
1935 return False
1936
1946
1947 - def __init__(self, first=(), suppliers=()):
1948 """RestrictionBatch([sequence]) -> new RestrictionBatch."""
1949 first = [self.format(x) for x in first]
1950 first += [eval(x) for n in suppliers for x in suppliers_dict[n][1]]
1951 set.__init__(self, first)
1952 self.mapping = dict.fromkeys(self)
1953 self.already_mapped = None
1954
1956 if len(self) < 5:
1957 return '+'.join(self.elements())
1958 else:
1959 return '...'.join(('+'.join(self.elements()[:2]),
1960 '+'.join(self.elements()[-2:])))
1961
1963 return 'RestrictionBatch(%s)' % self.elements()
1964
1971
1974
1977
1978 - def get(self, enzyme, add=False):
1979 """B.get(enzyme[, add]) -> enzyme class.
1980
1981 if add is True and enzyme is not in B add enzyme to B.
1982 if add is False (which is the default) only return enzyme.
1983 if enzyme is not a RestrictionType or can not be evaluated to
1984 a RestrictionType, raise a ValueError.
1985 """
1986 e = self.format(enzyme)
1987 if e in self:
1988 return e
1989 elif add:
1990 self.add(e)
1991 return e
1992 else:
1993 raise ValueError('enzyme %s is not in RestrictionBatch'
1994 % e.__name__)
1995
1997 """B.lambdasplit(func) -> RestrictionBatch .
1998
1999 the new batch will contains only the enzymes for which
2000 func return True.
2001 """
2002 d = [x for x in filter(func, self)]
2003 new = RestrictionBatch()
2004 new._data = dict(zip(d, [True] * len(d)))
2005 return new
2006
2008 """B.add_supplier(letter) -> add a new set of enzyme to B.
2009
2010 letter represents the suppliers as defined in the dictionary
2011 RestrictionDictionary.suppliers
2012 return None.
2013 raise a KeyError if letter is not a supplier code.
2014 """
2015 supplier = suppliers_dict[letter]
2016 self.suppliers.append(letter)
2017 for x in supplier[1]:
2018 self.add_nocheck(eval(x))
2019 return
2020
2022 """B.current_suppliers() -> add a new set of enzyme to B.
2023
2024 return a sorted list of the suppliers which have been used to
2025 create the batch.
2026 """
2027 suppl_list = sorted(suppliers_dict[x][0] for x in self.suppliers)
2028 return suppl_list
2029
2031 """ b += other -> add other to b, check the type of other."""
2032 self.add(other)
2033 return self
2034
2036 """ b + other -> new RestrictionBatch."""
2037 new = self.__class__(self)
2038 new.add(other)
2039 return new
2040
2042 """B.remove(other) -> remove other from B if other is a
2043 RestrictionType.
2044
2045 Safe set.remove method. Verify that other is a RestrictionType or can
2046 be evaluated to a RestrictionType.
2047 raise a ValueError if other can not be evaluated to a RestrictionType.
2048 raise a KeyError if other is not in B.
2049 """
2050 return set.remove(self, self.format(other))
2051
2052 - def add(self, other):
2053 """B.add(other) -> add other to B if other is a RestrictionType.
2054
2055 Safe set.add method. Verify that other is a RestrictionType or can be
2056 evaluated to a RestrictionType.
2057 raise a ValueError if other can not be evaluated to a RestrictionType.
2058 """
2059 return set.add(self, self.format(other))
2060
2062 """B.add_nocheck(other) -> add other to B. don't check type of other.
2063 """
2064 return set.add(self, other)
2065
2083
2085 """B.is_restriction(y) -> bool.
2086
2087 True is y or eval(y) is a RestrictionType.
2088 """
2089 return (isinstance(y, RestrictionType) or
2090 isinstance(eval(str(y)), RestrictionType))
2091
2092 - def split(self, *classes, **bool):
2093 """B.split(class, [class.__name__ = True]) -> new RestrictionBatch.
2094
2095 it works but it is slow, so it has really an interest when splitting
2096 over multiple conditions.
2097 """
2098 def splittest(element):
2099 for klass in classes:
2100 b = bool.get(klass.__name__, True)
2101 if issubclass(element, klass):
2102 if b:
2103 continue
2104 else:
2105 return False
2106 elif b:
2107 return False
2108 else:
2109 continue
2110 return True
2111 d = [k for k in filter(splittest, self)]
2112 new = RestrictionBatch()
2113 new._data = dict(zip(d, [True] * len(d)))
2114 return new
2115
2117 """B.elements() -> tuple.
2118
2119 give all the names of the enzymes in B sorted alphabetically.
2120 """
2121 l = sorted(str(e) for e in self)
2122 return l
2123
2125 """B.as_string() -> list.
2126
2127 return a list of the name of the elements of B.
2128 """
2129 return [str(e) for e in self]
2130
2131 @classmethod
2133 """B.suppl_codes() -> dict
2134
2135 letter code for the suppliers
2136 """
2137 supply = dict((k, v[0]) for k, v in suppliers_dict.items())
2138 return supply
2139
2140 @classmethod
2142 """B.show_codes() -> letter codes for the suppliers"""
2143 supply = [' = '.join(i) for i in cls.suppl_codes().items()]
2144 print('\n'.join(supply))
2145 return
2146
2147 - def search(self, dna, linear=True):
2148 """B.search(dna) -> dict."""
2149
2150
2151
2152
2153 if not hasattr(self, "already_mapped"):
2154
2155
2156 self.already_mapped = None
2157 if isinstance(dna, DNA):
2158
2159
2160
2161
2162 if (str(dna), linear) == self.already_mapped:
2163 return self.mapping
2164 else:
2165 self.already_mapped = str(dna), linear
2166 fseq = FormattedSeq(dna, linear)
2167 self.mapping = dict((x, x.search(fseq)) for x in self)
2168 return self.mapping
2169 elif isinstance(dna, FormattedSeq):
2170 if (str(dna), dna.linear) == self.already_mapped:
2171 return self.mapping
2172 else:
2173 self.already_mapped = str(dna), dna.linear
2174 self.mapping = dict((x, x.search(dna)) for x in self)
2175 return self.mapping
2176 raise TypeError("Expected Seq or MutableSeq instance, got %s instead"
2177 % type(dna))
2178
2179
2180
2181
2182
2183
2184
2185
2186 -class Analysis(RestrictionBatch, PrintFormat):
2187
2190 """Analysis([restrictionbatch [, sequence] linear=True]) -> New Analysis class.
2191
2192 For most of the method of this class if a dictionary is given it will
2193 be used as the base to calculate the results.
2194 If no dictionary is given a new analysis using the Restriction Batch
2195 which has been given when the Analysis class has been instantiated."""
2196 RestrictionBatch.__init__(self, restrictionbatch)
2197 self.rb = restrictionbatch
2198 self.sequence = sequence
2199 self.linear = linear
2200 if self.sequence:
2201 self.search(self.sequence, self.linear)
2202
2204 return 'Analysis(%s,%s,%s)' %\
2205 (repr(self.rb), repr(self.sequence), self.linear)
2206
2208 """A._sub_set(other_set) -> dict.
2209
2210 Internal use only.
2211
2212 screen the results through wanted set.
2213 Keep only the results for which the enzymes is in wanted set.
2214 """
2215 return dict((k, v) for k, v in self.mapping.items() if k in wanted)
2216
2218 """A._boundaries(start, end) -> tuple.
2219
2220 Format the boundaries for use with the methods that limit the
2221 search to only part of the sequence given to analyse.
2222 """
2223 if not isinstance(start, int):
2224 raise TypeError('expected int, got %s instead' % type(start))
2225 if not isinstance(end, int):
2226 raise TypeError('expected int, got %s instead' % type(end))
2227 if start < 1:
2228 start += len(self.sequence)
2229 if end < 1:
2230 end += len(self.sequence)
2231 if start < end:
2232 pass
2233 else:
2234 start, end == end, start
2235 if start < 1:
2236 start == 1
2237 if start < end:
2238 return start, end, self._test_normal
2239 else:
2240 return start, end, self._test_reverse
2241
2243 """A._test_normal(start, end, site) -> bool.
2244
2245 Internal use only
2246 Test if site is in between start and end.
2247 """
2248 return start <= site < end
2249
2251 """A._test_reverse(start, end, site) -> bool.
2252
2253 Internal use only
2254 Test if site is in between end and start (for circular sequences).
2255 """
2256 return start <= site <= len(self.sequence) or 1 <= site < end
2257
2258 - def print_that(self, dct=None, title='', s1=''):
2259 """A.print_that([dct[, title[, s1]]]) -> print the results from dct.
2260
2261 If dct is not given the full dictionary is used.
2262 """
2263 if not dct:
2264 dct = self.mapping
2265 print("")
2266 return PrintFormat.print_that(self, dct, title, s1)
2267
2269 """A.change(**attribute_name) -> Change attribute of Analysis.
2270
2271 It is possible to change the width of the shell by setting
2272 self.ConsoleWidth to what you want.
2273 self.NameWidth refer to the maximal length of the enzyme name.
2274
2275 Changing one of these parameters here might not give the results
2276 you expect. In which case, you can settle back to a 80 columns shell
2277 or try to change self.Cmodulo and self.PrefWidth in PrintFormat until
2278 you get it right.
2279 """
2280 for k, v in what.items():
2281 if k in ('NameWidth', 'ConsoleWidth'):
2282 setattr(self, k, v)
2283 self.Cmodulo = self.ConsoleWidth % self.NameWidth
2284 self.PrefWidth = self.ConsoleWidth - self.Cmodulo
2285 elif k is 'sequence':
2286 setattr(self, 'sequence', v)
2287 self.search(self.sequence, self.linear)
2288 elif k is 'rb':
2289 self = Analysis.__init__(self, v, self.sequence, self.linear)
2290 elif k is 'linear':
2291 setattr(self, 'linear', v)
2292 self.search(self.sequence, v)
2293 elif k in ('Indent', 'Maxsize'):
2294 setattr(self, k, v)
2295 elif k in ('Cmodulo', 'PrefWidth'):
2296 raise AttributeError(
2297 'To change %s, change NameWidth and/or ConsoleWidth'
2298 % name)
2299 else:
2300 raise AttributeError(
2301 'Analysis has no attribute %s' % name)
2302 return
2303
2304 - def full(self, linear=True):
2305 """A.full() -> dict.
2306
2307 Full Restriction Map of the sequence.
2308 """
2309 return self.mapping
2310
2311 - def blunt(self, dct=None):
2312 """A.blunt([dct]) -> dict.
2313
2314 Only the enzymes which have a 3'overhang restriction site.
2315 """
2316 if not dct:
2317 dct = self.mapping
2318 return dict((k, v) for k, v in dct.items() if k.is_blunt())
2319
2321 """A.overhang5([dct]) -> dict.
2322
2323 Only the enzymes which have a 5' overhang restriction site.
2324 """
2325 if not dct:
2326 dct = self.mapping
2327 return dict((k, v) for k, v in dct.items() if k.is_5overhang())
2328
2330 """A.Overhang3([dct]) -> dict.
2331
2332 Only the enzymes which have a 3'overhang restriction site.
2333 """
2334 if not dct:
2335 dct = self.mapping
2336 return dict((k, v) for k, v in dct.items() if k.is_3overhang())
2337
2339 """A.defined([dct]) -> dict.
2340
2341 Only the enzymes that have a defined restriction site in Rebase.
2342 """
2343 if not dct:
2344 dct = self.mapping
2345 return dict((k, v) for k, v in dct.items() if k.is_defined())
2346
2348 """A.with_sites([dct]) -> dict.
2349
2350 Enzymes which have at least one site in the sequence.
2351 """
2352 if not dct:
2353 dct = self.mapping
2354 return dict((k, v) for k, v in dct.items() if v)
2355
2357 """A.without_site([dct]) -> dict.
2358
2359 Enzymes which have no site in the sequence.
2360 """
2361 if not dct:
2362 dct = self.mapping
2363 return dict((k, v) for k, v in dct.items() if not v)
2364
2366 """A.With_N_Sites(N [, dct]) -> dict.
2367
2368 Enzymes which cut N times the sequence.
2369 """
2370 if not dct:
2371 dct = self.mapping
2372 return dict((k, v) for k, v in dct.items()if len(v) == N)
2373
2375 if not dct:
2376 dct = self.mapping
2377 return dict((k, v) for k, v in dct.items() if len(v) in list)
2378
2380 """A.with_name(list_of_names [, dct]) ->
2381
2382 Limit the search to the enzymes named in list_of_names.
2383 """
2384 for i, enzyme in enumerate(names):
2385 if enzyme not in AllEnzymes:
2386 print("no data for the enzyme: %s" % name)
2387 del names[i]
2388 if not dct:
2389 return RestrictionBatch(names).search(self.sequence)
2390 return dict((n, dct[n]) for n in names if n in dct)
2391
2393 """A.with_site_size(site_size [, dct]) ->
2394
2395 Limit the search to the enzymes whose site is of size <site_size>.
2396 """
2397 sites = [name for name in self if name.size == site_size]
2398 if not dct:
2399 return RestrictionBatch(sites).search(self.sequence)
2400 return dict((k, v) for k, v in dct.items() if k in site_size)
2401
2403 """A.only_between(start, end[, dct]) -> dict.
2404
2405 Enzymes that cut the sequence only in between start and end.
2406 """
2407 start, end, test = self._boundaries(start, end)
2408 if not dct:
2409 dct = self.mapping
2410 d = dict(dct)
2411 for key, sites in dct.items():
2412 if not sites:
2413 del d[key]
2414 continue
2415 for site in sites:
2416 if test(start, end, site):
2417 continue
2418 else:
2419 del d[key]
2420 break
2421 return d
2422
2423 - def between(self, start, end, dct=None):
2424 """A.between(start, end [, dct]) -> dict.
2425
2426 Enzymes that cut the sequence at least in between start and end.
2427 They may cut outside as well.
2428 """
2429 start, end, test = self._boundaries(start, end)
2430 d = {}
2431 if not dct:
2432 dct = self.mapping
2433 for key, sites in dct.items():
2434 for site in sites:
2435 if test(start, end, site):
2436 d[key] = sites
2437 break
2438 continue
2439 return d
2440
2442 """A.show_only_between(start, end [, dct]) -> dict.
2443
2444 Enzymes that cut the sequence outside of the region
2445 in between start and end but do not cut inside.
2446 """
2447 d = []
2448 if start <= end:
2449 d = [(k, [vv for vv in v if start <= vv <= end])
2450 for v in self.between(start, end, dct)]
2451 else:
2452 d = [(k, [vv for vv in v if start <= vv or vv <= end])
2453 for v in self.between(start, end, dct)]
2454 return dict(d)
2455
2457 """A.only_outside(start, end [, dct]) -> dict.
2458
2459 Enzymes that cut the sequence outside of the region
2460 in between start and end but do not cut inside.
2461 """
2462 start, end, test = self._boundaries(start, end)
2463 if not dct:
2464 dct = self.mapping
2465 d = dict(dct)
2466 for key, sites in dct.items():
2467 if not sites:
2468 del d[key]
2469 continue
2470 for site in sites:
2471 if test(start, end, site):
2472 del d[key]
2473 break
2474 else:
2475 continue
2476 return d
2477
2478 - def outside(self, start, end, dct=None):
2479 """A.outside((start, end [, dct]) -> dict.
2480
2481 Enzymes that cut outside the region in between start and end.
2482 No test is made to know if they cut or not inside this region.
2483 """
2484 start, end, test = self._boundaries(start, end)
2485 if not dct:
2486 dct = self.mapping
2487 d = {}
2488 for key, sites in dct.items():
2489 for site in sites:
2490 if test(start, end, site):
2491 continue
2492 else:
2493 d[key] = sites
2494 break
2495 return d
2496
2498 """A.do_not_cut(start, end [, dct]) -> dict.
2499
2500 Enzymes that do not cut the region in between start and end.
2501 """
2502 if not dct:
2503 dct = self.mapping
2504 d = self.without_site()
2505 d.update(self.only_outside(start, end, dct))
2506 return d
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530 CommOnly = RestrictionBatch()
2531 NonComm = RestrictionBatch()
2532 for TYPE, (bases, enzymes) in typedict.items():
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550 bases = tuple(eval(x) for x in bases)
2551
2552
2553
2554
2555 T = type.__new__(RestrictionType, 'RestrictionType', bases, {})
2556 for k in enzymes:
2557
2558
2559
2560
2561
2562 newenz = T(k, bases, enzymedict[k])
2563
2564
2565
2566
2567
2568 if newenz.is_comm():
2569 CommOnly.add_nocheck(newenz)
2570 else:
2571 NonComm.add_nocheck(newenz)
2572
2573
2574
2575 AllEnzymes = CommOnly | NonComm
2576
2577
2578
2579 names = [str(x) for x in AllEnzymes]
2580 try:
2581 del x
2582 except NameError:
2583
2584 pass
2585 locals().update(dict(zip(names, AllEnzymes)))
2586 __all__ = ['FormattedSeq', 'Analysis', 'RestrictionBatch', 'AllEnzymes',
2587 'CommOnly', 'NonComm'] + names
2588 del k, enzymes, TYPE, bases, names
2589